#=====Lecture 9 r code: statistics


#===== Plot the standard normal curve
x <- seq(-4,4,0.001)
plot(x=x, y=dnorm(x), type="l", lwd=2, col="blue",yaxs="i", ylim=c(0,0.45),
     xlab="Value of x", ylab="Density")

# create the explanatory plot
par(mfrow=c(2,1))
x <- seq(-4,4,0.001)
plot(x=x, y=dnorm(x), type="l", lwd=2, col="blue",yaxs="i",
     ylim=c(0,0.45),xlab="Value of x", ylab="Density",  xaxt="n")
lines(x=x[x<(-1.96)], y=dnorm(x[x<(-1.96)]),
      type="h", col="#77777777")
axis(at=c(-4,-1.96,0,1.96, 4),lab=c("-4","-x", "0","+x", "4"),side=1)

plot(x=x, y=dnorm(x), type="l", lwd=2, col="blue",yaxs="i",
     ylim=c(0,0.45),xlab="Value of x", ylab="Density", xaxt="n")
lines(x=x[x<1.96], y=dnorm(x[x<1.96]),
      type="h", col="#77777777")
axis(at=c(-4,-1.96,0,1.96, 4),lab=c("-4","-x", "0","+x", "4"),side=1)

## show qnorm
quants <- qnorm(c(0.01,0.025,0.05,0.95,0.975,0.99))
round(quants,2)

## show pnorm
pnorm(quants)
pnorm(-2.33)
pnorm(-2.33, lower.tail=F)

pnorm(-2.33) + pnorm(-2.33, lower.tail=F)

# demonstrate dnorm and rnorm
dnorm(quants)
rnorm(n=10)



#===random numbers
?.Random.seed
?set.seed
.Random.seed[1:4]
rnorm(1)
.Random.seed[1:4]
rnorm(1)
.Random.seed[1:4]

set.seed(100)
rnorm(3)
rnorm(3)
set.seed(100)
rnorm(3)
rnorm(3)

2^31-1
-2^31
rnorm(5)

#=====using the sample function
#roll some dice 10 times
sample(1:6,size=10,replace=T)

#flip a coin 10 times
sample(c("H","T"), size=10, replace=T)

#select five cards from a deck
cards <- paste(rep(c("A",2:10,"J","Q","K"),4),
               c("Club","Diamond","Heart","Spade"))
sort(cards)  #check that this worked! 
sample(cards,5)

#times out of 10000 you select 0,1,2,3,4,5 hearts
x <- vector(length=10000)
for (i in 1:10000) {
  x[i] <- length(grep("Heart", sample(cards,5)))
}
table(x)
#Note: grep searches for a matching regular expression and return the number of matches.

#first two cards both Aces
x <- vector(length=10000)
for (i in 1:10000) {
  x[i] <- 2==length(grep("A", sample(cards,2)))
}
table(x)

#4 aces out of 7 cards (Texas hold 'em)
x <- vector(length=100000)
for (i in 1:100000) {
  x[i] <- length(grep("A", sample(cards,7)))>=4
}
table(x)

#=======Hands-on exercise 1
require(plyr)
require(TeachingDemos)
Fbomb<-rnorm(100, mean=80, sd=10)
f<-length(which(Fbomb>100 && Fbomb<60))/length(Fbomb)
gib<-pnorm(0.24, mean=0,sd=1, lower.tail=T,log.p=F)
gib

#=====exploratory data analysis
summary(iris)
fivenum(iris[,3]) #min, lower quart, mean, upper q, max for petal length
sd(iris[,3]) # sd for petal length
apply(iris[,1:4], 2, sd, na.rm=T) # sd for each variable, omit spp.
apply(iris[,1:4], 2, range, na.rm=T) #min and max for each variable, omit spp.

boxplot(iris)

pairs(iris[,1:4])

pairs(iris[,1:4], main = "Edgar Anderson's Iris Data", pch = 19,
      col = rep(c("#FF000055", "#00FF0055", "#0000FF55"), c(50,50,50)))
pairs(iris[,1:4], main = "Edgar Anderson's Iris Data", pch = 21,
      bg = rep(c("red", "green3", "blue"), table(iris$Species)))

#====histogram
hist(iris$Petal.Length, main="", col="gray", xlab="Petal length (cm)")
table(iris$Species)
table(iris$Petal.Length)
par(mfrow=c(1,3))
hist(iris[iris$Species=="setosa",]$Petal.Length, ylim=c(0,50),
     breaks=seq(1,7,0.5), main="setosa", col="gray", xlab="Petal length (cm)")
hist(iris[iris$Species=="versicolor",]$Petal.Length, ylim=c(0,50), 
     breaks=seq(1,7,0.5), main="versicolor", col="gray", xlab="Petal length (cm)")
hist(iris[iris$Species=="virginica",]$Petal.Length,  ylim=c(0,50),
     breaks=seq(1,7,0.5), main="virginica", col="gray", xlab="Petal length (cm)")

#==adding density curves
par(mfrow=c(1,1))
hist(iris$Petal.Length, main="", col="gray", xlab="Petal length (cm)",
     freq=F)
lines(density(iris$Petal.Length), lwd=2, col="red")
lines(density(iris$Petal.Length,adjust=0.4), lwd=2, col="blue")
lines(density(iris$Petal.Length,adjust=2), lwd=2, col="green3")


#=====using t-test
methodA <- c(79.982, 80.041, 80.018, 80.041, 80.03, 80.029, 80.038, 79.968, 80.049, 80.029, 80.019, 80.002, 80.022)
methodB <- c(80.02, 79.939, 79.98, 79.971, 79.97, 80.029, 79.952, 79.968)

#briefly examine assumptions
boxplot(methodA, methodB, names = c("A", "B"))
par(mfrow = c(1,2))
qqnorm(methodA)
qqline(methodA)
qqnorm(methodB); qqline(methodB)

t.test(methodA, methodB)
resultsAB <- t.test(methodA, methodB)
names(resultsAB)
resultsAB$p.value

var(methodA)
var(methodB)

#test of equal variances
var.test(methodA, methodB)

#use equal variances
t.test(methodA, methodB, var.equal = TRUE)

#Mann-Whitney test
wilcox.test(methodA, methodB)

#======Hands-on Exercise 2
farb<-rnorm(30, mean=0, sd=1)
farg<-rexp(30, rate=1)
farp<-rt(30,df=Inf)
farn<-rcauchy(30, location=0,scale=1)

qqnorm(farb)
qqline(farb)
qqnorm(farg)
qqline(farg)
qqnorm(farp)
qqline(farp)
qqnorm(farn)
qqline(farn)